/* File 1 of 2 for Appendix Figures 2 and 3 */


#delimit ;
clear all;
local outfile "GWgapr10_pfbyind_TL";
set more off;


di _n "$S_DATE $S_TIME";







********************************************************************************;
* Setting up locals for regs;

local type fte_del; 


******************************;
* Specifying the labour input;	

* head count*(average fte)^delta;
	
local L_fte_del_pf = "(
	sqrt({phi_lt25_pf=0.5}^2)  *(L_hc_m_lt25  *(av_fte_m_lt25)^{delta_pf=1} + sqrt({phi_f_pf=0.9}^2)*L_hc_f_lt25  *(av_fte_f_lt25)^{delta_pf})
	+ 							(L_hc_m_25to39*(av_fte_m_25to39)^{delta_pf} + sqrt({phi_f_pf}^2)    *L_hc_f_25to39*(av_fte_f_25to39)^{delta_pf})
	+ sqrt({phi_40to54_pf=1}^2)*(L_hc_m_40to54*(av_fte_m_40to54)^{delta_pf} + sqrt({phi_f_pf}^2)    *L_hc_f_40to54*(av_fte_f_40to54)^{delta_pf})
	+ sqrt({phi_55p_pf=1}^2)   *(L_hc_m_55p   *(av_fte_m_55p)^{delta_pf}    + sqrt({phi_f_pf}^2)    *L_hc_f_55p   *(av_fte_f_55p)^{delta_pf})
	)";
	
local L_fte_del_wb = "(
	sqrt({phi_lt25_wb=0.5}^2)  *(L_hc_m_lt25  *(av_fte_m_lt25)^{delta_wb=1} + sqrt({phi_f_wb=0.9}^2)*L_hc_f_lt25  *(av_fte_f_lt25)^{delta_wb})
	+ 							(L_hc_m_25to39*(av_fte_m_25to39)^{delta_wb} + sqrt({phi_f_wb}^2)    *L_hc_f_25to39*(av_fte_f_25to39)^{delta_wb})
	+ sqrt({phi_40to54_wb=1}^2)*(L_hc_m_40to54*(av_fte_m_40to54)^{delta_wb} + sqrt({phi_f_wb}^2)    *L_hc_f_40to54*(av_fte_f_40to54)^{delta_wb})
	+ sqrt({phi_55p_wb=1}^2)   *(L_hc_m_55p   *(av_fte_m_55p)^{delta_wb}    + sqrt({phi_f_wb}^2)    *L_hc_f_55p   *(av_fte_f_55p)^{delta_wb})
	)";

local vars_fte_del "`vars_fte'";

local L_fte_dell "Sum of head count*(average FTEs)^{delta} for all labour types";

	



	
	
	
	
********************************************************************************;
* Reading in data;

* using restricted set of consistent obs;

use if Q_rest==1 using GWgap_pr_firm_v4, clear;

drop *ten* INpf_*;


xi i.ind2, pre(IN); 

gen con = 1; 
	
unab extra_ctls: multiplant WPc_* Yyear* INind*;






*******************************************************************************;
* Setting up matrices to collect coefficients etc;

/*
* In regression only letting things vary by gender share, collect:
* 3 matrices, everywhere an observation is an industry

* Matrix C (coefficient matrix):
* * row name: industry code
* * col 1: empty
* * col 2-4: coeffs on lnM, lnK, lnL in production fn for industry
* * col 5-7 (prev 5): coeff on lnL in wage fn for industry
* * col 8-9 (prev 6-7): phi_fs in production fn and wage fn in industry 
* * col 10-15: phi_ages in pf fn and then in wb fn
* * col 16-17: deltas in pf and wb

* Matrix SE (standard error matrix):
* * row name: industry code
* * col 1: empty
* * col 2-4: se's on lnM, lnK, lnL in production fn for industry
* * col 5-7 (prev 5): se on lnL in wage fn for industry
* * col 8-9 (prev 6-7): se's on phi_fs in production fn and wage fn in industry year
* * col 10 (prev 8): se on (1 - phi_wb/phi_pf)
* * col 11-16: ses on phi_ages in pf fn and then in wb fn
* * col 17-18: ses on deltas in pf and wb  

* Matrix Obs (observations matrix):
* * row name: industry code
* * col 1: empty
* * col 2: number of observations in industry 
*/

* creating locals for list of industries and list of years, and for numbers of 
elements in each;

levelsof pf_ind, local(ind3s);
local num_ind3: list sizeof ind3s;


local matrows = `num_ind3';

* creating matrices to store results, populating with # of firms;

matrix define C = J(`matrows',17,.);
matrix define SE = J(`matrows',18,.);
matrix define Obs = J(`matrows',2,.);

local rownames "";
local row 0;
foreach ind in `ind3s' {;
	local row = `row' + 1;
	local rownames "`rownames' `ind'";
	
	qui sum con if pf_ind=="`ind'";
	matrix Obs[`row',2] = r(N);
};

matrix rownames C = `rownames';
matrix colnames C = none lnM_pf lnK_pf lnL_pf lnM_wb lnK_wb lnL_wb phi_f_pf phi_f_wb
	phi_lt25_pf phi_40to54_pf phi_55p_pf phi_lt25_wb phi_40to54_wb phi_55p_wb delta_pf delta_wb;
matrix rownames SE = `rownames';
matrix colnames SE = none lnM_pf lnK_pf lnL_pf lnM_wb lnK_wb lnL_wb phi_f_pf phi_f_wb discrim
	phi_lt25_pf phi_40to54_pf phi_55p_pf phi_lt25_wb phi_40to54_wb phi_55p_wb delta_pf delta_wb;
matrix rownames Obs = `rownames';
matrix colnames Obs = none obs;


local lab `type'; * The way labour input is measured;

if "`lab'"=="hc" local wgtvar L_hc_t;
if "`lab'"=="fte" | "`lab'"=="ftpt" | "`lab'"=="fte_del" | "`lab'"=="hrs" | "`lab'"=="hrs_del"
	local wgtvar L_fte_t;


 


local row 1;

foreach ind in `ind3s' {; * looping over pf_ind industries;

	di as input _n _n "$S_DATE $S_TIME: Starting regression for `ind'";
				
	
	* keeping only the subset of controls that vary within industry in question;
	local extra_ctls2 "";
	foreach var in `extra_ctls' {;
		qui sum `var' if pf_ind=="`ind'";
		if r(sd)>0 & r(sd)<. local extra_ctls2 "`extra_ctls2' `var'";
	};
	
	* listing industry FE in extra_ctls2;
	di "Extra controls2: `extra_ctls2'";
	
	local indvars "";
	local nonindvars "";
	foreach var in `extra_ctls2' {;
		if strpos("`var'","INind")==1 local indvars "`indvars' `var'";
		if strpos("`var'","INind")!=1 local nonindvars "`nonindvars' `var'";
	};
	
	* omitting the first industry FE;
	
	di "indvars `indvars'";
	local ind1: word 1 of `indvars';
	local indvars: list indvars - ind1;
	di "indvars `indvars'";
	di "nonindvars `nonindvars'";
	local extra_ctls2: list nonindvars | indvars;
	di "Extra controls2: `extra_ctls2'";

	
	
	* regs;
	
	capture noisily nlsur (lngo = {lnL_pf=1}*ln(`L_`lab'_pf')	
			+ {lnLsq_pf=1}*ln(`L_`lab'_pf')*ln(`L_`lab'_pf')/100
			+ {lnL_lnK_pf=1}*ln(`L_`lab'_pf')*lnK/100
			+ {lnL_lnM_pf=1}*ln(`L_`lab'_pf')*lnM/100
		
			+ {lnK_pf=1}*lnK
			+ {lnKsq_pf=1}*lnKsq
			+ {lnK_lnM_pf=1}*lnK_lnM
			
			+ {lnM_pf=1}*lnM
			+ {lnMsq_pf=1}*lnMsq
			+ {xb_pf: `extra_ctls2' con})
		(lnWB = {lnL_wb=1}*ln(`L_`lab'_wb')	
			+ {lnLsq_wb=1}*ln(`L_`lab'_wb')*ln(`L_`lab'_wb')/100
			+ {lnL_lnK_wb=1}*ln(`L_`lab'_wb')*lnK/100
			+ {lnL_lnM_wb=1}*ln(`L_`lab'_wb')*lnM/100
		
			+ {lnK_wb=1}*lnK
			+ {lnKsq_wb=1}*lnKsq
			+ {lnK_lnM_wb=1}*lnK_lnM
			
			+ {lnM_wb=1}*lnM
			+ {lnMsq_wb=1}*lnMsq
			+ {xb_wb: `extra_ctls2' con})
		[aweight = `wgtvar']
		if pf_ind=="`ind'", cluster(pent) iter(200);
		
	di as input "Command just run: " e(cmdline);
	
	* don't save output if did not converge;
	if e(converged)==0 {;
		local row = `row' + 1;
		di _n "Not saving results for `ind' because it did not converge in 200 iterations" _n;
		continue;
	};
	
	
	* saving regression results to matrices;
	
	matrix C[`row',2] = _b[/lnM_pf];
	matrix C[`row',3] = _b[/lnK_pf];
	matrix C[`row',4] = _b[/lnL_pf];
	matrix C[`row',5] = _b[/lnM_wb];
	matrix C[`row',6] = _b[/lnK_wb];
	matrix C[`row',7] = _b[/lnL_wb];
	matrix C[`row',8] = _b[/phi_f_pf];
	matrix C[`row',9] = _b[/phi_f_wb];
	matrix C[`row',10] = _b[/phi_lt25_pf];
	matrix C[`row',11] = _b[/phi_40to54_pf];
	matrix C[`row',12] = _b[/phi_55p_pf];
	matrix C[`row',13] = _b[/phi_lt25_wb];
	matrix C[`row',14] = _b[/phi_40to54_wb];
	matrix C[`row',15] = _b[/phi_55p_wb];
	capture matrix C[`row',16] = _b[/delta_pf];
	capture matrix C[`row',17] = _b[/delta_wb];
	
	
	matrix SE[`row',2] = _se[/lnM_pf];
	matrix SE[`row',3] = _se[/lnK_pf];
	matrix SE[`row',4] = _se[/lnL_pf];
	matrix SE[`row',5] = _se[/lnM_wb];
	matrix SE[`row',6] = _se[/lnK_wb];
	matrix SE[`row',7] = _se[/lnL_wb];
	matrix SE[`row',8] = _se[/phi_f_pf];
	matrix SE[`row',9] = _se[/phi_f_wb];

	nlcom 1 - _b[/phi_f_wb]/_b[/phi_f_pf];
	matrix V = r(V);
	local se = V[1,1]^0.5;
	matrix SE[`row',10] = `se';
	
	matrix SE[`row',11] = _se[/phi_lt25_pf];
	matrix SE[`row',12] = _se[/phi_40to54_pf];
	matrix SE[`row',13] = _se[/phi_55p_pf];
	matrix SE[`row',14] = _se[/phi_lt25_wb];
	matrix SE[`row',15] = _se[/phi_40to54_wb];
	matrix SE[`row',16] = _se[/phi_55p_wb];
	capture matrix SE[`row',17] = _se[/delta_pf];
	capture matrix SE[`row',18] = _se[/delta_wb];
	
	local row = `row' + 1;
};	


* converting saved results into a data set;

#delimit ;
clear;
svmat2 C, names(col) rnames(ind3); 
rename * c_*;
drop c_none;
rename c_ind3 ind3;
save temp_C, replace;

clear;
svmat2 SE, names(col) rnames(ind3);
rename * se_*;
drop se_none;
rename se_ind3 ind3;
save temp_SE, replace;


clear;
svmat2 Obs, names(col) rnames(ind3);
drop none;

merge 1:1 ind3 using temp_C, nogen;
merge 1:1 ind3 using temp_SE, nogen;


notes: `outfile'_res_`lab' created on $S_DATE $S_TIME.;
notes: `outfile'_res_`lab' is a set of regression results generated by `outfile'.do.
The regressions are run separately by productivity industry, but the phi_fs in the production and
wage equations are NOT allowed to vary by year. The regessions include phi_f and age
phis, but not interacted. Year FE and anzsic 2-digit industry FE are included. Regressions
are weighted by ftes, and
standard errors are clustered at the pent level. The sample is the consistent restricted subsample.;
notes: The labour specification is `L_`lab'l'.;
save `outfile'_res_`lab', replace;





